#Data prep for replication
library(foreign)
library(cjoint)
library(plyr)
library(dplyr)
library(labelled)
library(expss)
library(tidyr)
library(haven)

setwd("")

##Australia

#Import qualtrics file
Aus <- read.csv("Conjoint_Australia.csv") 

#drop responses that are unfinished, did not provide consent or did not meet quota requirements
Aus <- subset(Aus, Finished != "0")
Aus <- subset(Aus, consent != "0")
Aus <- subset(Aus, t_t1_First.Click != "")

#drop one response by a respondent who answered twice
Aus <- subset(Aus, ResponseId != "R_1LLjoVOtuK1Jh9Y")

# Change -99 to missing in outcome variables
columns_to_process <- c("ch1", "ch2", "ch3", "ch4", "ch5")
for (col_name in columns_to_process) {
  Aus[[col_name]][Aus[[col_name]] == "-99"] <- ""
}

write.csv(Aus, "Australia.csv", row.names = FALSE)

AUSdata <- read.qualtrics("Australia.csv", responses=c("ch1", "ch2", "ch3", "ch4", "ch5"), 
                              covariates=c("male", "age", "educ", "emp", "hhinc", "zip", "equal", "ineq", "partyid", "vote", "rile", "state", "trust", "Duration..in.seconds.", "just"),
                              respondentID = "ResponseId", letter="F", new.format=TRUE)

#fix variable names
AUSdata <- AUSdata %>%
  dplyr::rename(duration = male,
          male = age,    
          age = educ,
          educ = emp,
          emp = hhinc,
          hhinc = zip,
          zip = equal,
          equal = partyid,
          partyid = rile,
          rile = trust,
          trust = just,
          just = respondent,
          ineq = vote,
          vote = state, 
          state = Duration..in.seconds.,
         level_s = Annual.income,
         level_row = Annual.income.rowpos,
         tax_s = Percentage.of.income.paid.in.sales.taxes,
         tax_row = Percentage.of.income.paid.in.sales.taxes.rowpos,
         source_s = Source.of.income,
         source_row = Source.of.income.rowpos)

AUSdata <- AUSdata %>%
  group_by(Response.ID) %>%
  mutate(id = cur_group_id()) %>%
  ungroup() 

#Drop speeders (those who finished in less than half of median time)
AUSdata$duration <- as.numeric(AUSdata$duration)
p50 <- median(AUSdata$duration)
AUSdata <- AUSdata %>%
  filter(!(duration < p50 / 2))

#Outcome variable
AUSdata <- AUSdata %>%
  filter(selected != "NA")

AUSdata <- AUSdata %>%
  rename(choice = selected)

#Create treatment variables
AUSdata <- AUSdata %>%
  mutate(source = 0) %>%  
  mutate(source = ifelse(source_s == "Appointed by parent in company they direct", 1, source)) %>% 
  mutate(source = ifelse(source_s == "Owns business that was bailed out by government", 2, source)) %>%
  mutate(source = ifelse(source_s == "Receives annuity from lottery prize", 3, source))

AUSdata$source <- factor(AUSdata$source,
                    levels = c(0,1,2,3),
                    labels = c("Effort", "Social background", "State benefit", "Luck"))

AUSdata <- AUSdata %>%
  mutate(tax = 0) %>%  
  mutate(tax = ifelse(tax_s == "5%", 1, tax)) %>% 
  mutate(tax = ifelse(tax_s == "1%", 2, tax))
 
AUSdata$tax <- factor(AUSdata$tax,
                             levels = c(0,1,2),
                             labels = c("10%", "5%", "1%"))

AUSdata <- AUSdata %>%
  mutate(tax2 = 0) %>%  
  mutate(tax2 = ifelse(tax_s == "5%", 1, tax2)) %>% 
  mutate(tax2 = ifelse(tax_s == "1%", 2, tax2))

AUSdata$tax2 <- factor(AUSdata$tax2,
                      levels = c(0,1,2),
                      labels = c("High", "Medium", "Low"))

AUSdata <- AUSdata %>%
  mutate(level = 0) %>%  
  mutate(level = ifelse(level_s == "$90,000", 1, level)) %>% 
  mutate(level = ifelse(level_s == "$160,000", 2, level))

AUSdata$level <- factor(AUSdata$level,
                          levels = c(0,1,2),
                          labels = c("$40,000", "$90,000", "$160,000"))

AUSdata <- AUSdata %>%
  mutate(income = 0) %>%  
  mutate(income = ifelse(level_s == "$90,000", 1, income)) %>% 
  mutate(income = ifelse(level_s == "$160,000", 2, income))

AUSdata$income <- factor(AUSdata$income,
                            levels = c(0,1,2),
                            labels = c("Low", "Middle", "High"))

#Create variables for conditional analyses
AUSdata <- AUSdata %>%
  mutate(r_party = 0) %>%  
  mutate(r_party = ifelse(partyid %in% c(1, 2), 1, r_party)) %>% 
  mutate(r_party = ifelse(partyid %in% c(0, 99), NA, r_party))

AUSdata$r_party <- factor(AUSdata$r_party,
                              levels = c(0,1),
                              labels = c("Left-wing", "Right-wing"))
var_label(AUSdata$r_party) <- "Identifies with right-wing party (vs left-wing)"

AUSdata <- AUSdata %>%
  mutate(r_vote = 0) %>%  
  mutate(r_vote = ifelse(vote > 0 & vote < 50, 1, r_vote)) %>% 
  mutate(r_vote = ifelse(vote == 0 | vote == 99 | vote == -99, NA, r_vote))

AUSdata$r_vote <- factor(AUSdata$r_vote,
                              levels = c(0,1),
                              labels = c("Left-wing", "Right-wing"))

var_label(AUSdata$r_vote) <- "Voted for right-wing party (vs left-wing)"

AUSdata <- AUSdata %>%
  mutate(r_ideo = 0) %>%  
  mutate(r_ideo = ifelse(rile > 0, 1, r_ideo)) %>% 
  mutate(r_ideo = ifelse(rile == 0 | rile == -99, NA, r_ideo))

AUSdata$r_ideo <- factor(AUSdata$r_ideo,
                             levels = c(0,1),
                             labels = c("Left-wing", "Right-wing"))

var_label(AUSdata$r_ideo) <- "Self-identifies as right-wing (vs left-wing)"

AUSdata <- AUSdata %>%
  mutate(ideo = 0) %>%  
  mutate(ideo = ifelse(rile > 0, 1, ideo)) %>% 
  mutate(ideo = ifelse(rile < 0 & rile > -10, -1, ideo)) %>% 
  mutate(ideo = ifelse(rile == -99, NA, ideo))

AUSdata$ideo <- factor(AUSdata$ideo,
                              levels = c(-1,1,0),
                              labels = c("Left-wing", "Right-wing", "Centrist"))

var_label(AUSdata$ideo) <- "3 point Ideological self-placement (left-centre-right)"

AUSdata$hhinc <- as.numeric(AUSdata$hhinc)
AUSdata$hhinc[AUSdata$hhinc == -99] <- NA

AUSdata <- AUSdata %>%
  mutate(hhinc4 = 0) %>%  
  mutate(hhinc4 = ifelse(hhinc < 5, 1, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 5 & hhinc < 9, 2, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 9 & hhinc < 11, 3, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 11, 4, hhinc4))

AUSdata$hhinc4 <- factor(AUSdata$hhinc4,
                           levels = c(1,2,3,4),
                           labels = c("1.Low", "2.Middle", "3.High", "4.Very high"))

var_label(AUSdata$hhinc4) <- "4 Income groups"


AUSdata$equal <- as.factor(AUSdata$equal)
var_label(AUSdata$equal) <- "Equal treatment"
AUSdata$equal[AUSdata$equal == -99] <- NA
AUSdata$equal <- factor(AUSdata$equal,
                           levels = c(0,1),
                           labels = c("No", "Yes"))

#Country identifier
AUSdata$country <- 2


##Chile

#Import qualtrics file
Ch <- read.csv("Conjoint_Chile.csv") 

#drop responses that are unfinished, did not provide consent or did not meet quota requirements
Ch <- subset(Ch, Finished != "0")
Ch <- subset(Ch, consent != "0")
Ch <- subset(Ch, t_t1_First.Click != "")

# Change -99 to missing in outcome variables
for (col_name in columns_to_process) {
  Ch[[col_name]][Ch[[col_name]] == "-99"] <- ""
}

write.csv(Ch, "Chile.csv", row.names = FALSE)

CHdata <- read.qualtrics("Chile.csv", responses=c("ch1", "ch2", "ch3", "ch4", "ch5"), 
                          covariates=c("male", "age", "educ", "emp", "hhinc", "zip", "equal", "ineq", "partyid", "vote", "rile", "state", "trust", "Duration..in.seconds.", "just"),
                          respondentID = "ResponseId", letter="F", new.format=TRUE)

#fix variable names
CHdata <- CHdata %>%
  dplyr::rename(duration = male,
                male = age,    
                age = educ,
                educ = emp,
                emp = hhinc,
                hhinc = zip,
                zip = equal,
                equal = partyid,
                partyid = rile,
                rile = trust,
                trust = just,
                just = respondent,
                ineq = vote,
                vote = state, 
                state = Duration..in.seconds.,
                level_s = Ingreso.mensual,
                level_row = Ingreso.mensual.rowpos,
                tax_s = "Porcentaje.del.ingreso.pagado.en.impuestos.al.consumo.(IVA)",
                tax_row = "Porcentaje.del.ingreso.pagado.en.impuestos.al.consumo.(IVA).rowpos",
                source_s = Fuente.de.ingresos,
                source_row = Fuente.de.ingresos.rowpos)

CHdata <- CHdata %>%
  group_by(Response.ID) %>%
  mutate(id = cur_group_id()) %>%
  ungroup() 

#Drop speeders (those who finished in less than half of median time)
CHdata$duration <- as.numeric(CHdata$duration)
p50 <- median(CHdata$duration)
CHdata <- CHdata %>%
  filter(!(duration < p50 / 2))

#Outcome variable
CHdata <- CHdata %>%
  filter(selected != "NA")

CHdata <- CHdata %>%
  rename(choice = selected)

#Create treatment variables
CHdata <- CHdata %>%
  mutate(source = 0) %>%  
  mutate(source = ifelse(source_s == "Consiguió trabajo a través de sus contactos familiares", 1, source)) %>% 
  mutate(source = ifelse(source_s == "Es propietario/a de una empresa que recibe subsidios estatales", 2, source)) %>%
  mutate(source = ifelse(source_s == "Recibe una anualidad de un premio de lotería", 3, source))

CHdata$source <- factor(CHdata$source,
                         levels = c(0,1,2,3),
                         labels = c("Effort", "Social background", "State benefit", "Luck"))

CHdata <- CHdata %>%
  mutate(tax = 0) %>%  
  mutate(tax = ifelse(tax_s == "10%", 1, tax)) %>% 
  mutate(tax = ifelse(tax_s == "5%", 2, tax))

CHdata$tax <- factor(CHdata$tax,
                      levels = c(0,1,2),
                      labels = c("15%", "10%", "5%"))

CHdata <- CHdata %>%
  mutate(tax2 = 0) %>%  
  mutate(tax2 = ifelse(tax_s == "10%", 1, tax2)) %>% 
  mutate(tax2 = ifelse(tax_s == "5%", 2, tax2))

CHdata$tax2 <- factor(CHdata$tax2,
                       levels = c(0,1,2),
                       labels = c("High", "Medium", "Low"))

CHdata <- CHdata %>%
  mutate(level = 0) %>%  
  mutate(level = ifelse(level_s == "$800,000", 1, level)) %>% 
  mutate(level = ifelse(level_s == "$1,500,000", 2, level))

CHdata$level <- factor(CHdata$level,
                        levels = c(0,1,2),
                        labels = c("$350,000", "$800,000", "$1,500,000"))

CHdata <- CHdata %>%
  mutate(income = 0) %>%  
  mutate(income = ifelse(level_s == "$800,000", 1, income)) %>% 
  mutate(income = ifelse(level_s == "$1,500,000", 2, income))

CHdata$income <- factor(CHdata$income,
                         levels = c(0,1,2),
                         labels = c("Low", "Middle", "High"))

#Create variables for conditional analyses
CHdata <- CHdata %>%
  mutate(r_party = 0) %>%  
  mutate(r_party = ifelse(partyid %in% c(1, 2, 3), 1, r_party)) %>% 
  mutate(r_party = ifelse(partyid %in% c(0, 99, -99, 10), NA, r_party))

CHdata$r_party <- factor(CHdata$r_party,
                          levels = c(0,1),
                          labels = c("Left-wing", "Right-wing"))
var_label(CHdata$r_party) <- "Identifies with right-wing party (vs left-wing)"

CHdata <- CHdata %>%
  mutate(r_vote = 0) %>%  
  mutate(r_vote = ifelse(vote %in% c(1, 2), 1, r_vote)) %>% 
  mutate(r_vote = ifelse(vote %in% c(0, 99, -99, 10), NA, r_vote))

CHdata$r_vote <- factor(CHdata$r_vote,
                         levels = c(0,1),
                         labels = c("Left-wing", "Right-wing"))

var_label(CHdata$r_vote) <- "Voted for right-wing party (vs left-wing)"

CHdata <- CHdata %>%
  mutate(r_ideo = 0) %>%  
  mutate(r_ideo = ifelse(rile > 0, 1, r_ideo)) %>% 
  mutate(r_ideo = ifelse(rile %in% c(0, -99), NA, r_ideo))

CHdata$r_ideo <- factor(CHdata$r_ideo,
                         levels = c(0,1),
                         labels = c("Left-wing", "Right-wing"))

var_label(CHdata$r_ideo) <- "Self-identifies as right-wing (vs left-wing)"

CHdata <- CHdata %>%
  mutate(ideo = 0) %>%  
  mutate(ideo = ifelse(rile %in% c(1, 2), 1, ideo)) %>% 
  mutate(ideo = ifelse(rile %in% c(-1, -2), -1, ideo)) %>% 
  mutate(ideo = ifelse(rile == -99, NA, ideo))

CHdata$ideo <- factor(CHdata$ideo,
                       levels = c(-1,1,0),
                       labels = c("Left-wing", "Right-wing", "Centrist"))

var_label(CHdata$ideo) <- "3 point Ideological self-placement (left-centre-right)"

CHdata$hhinc <- as.numeric(CHdata$hhinc)
CHdata$hhinc[CHdata$hhinc == -99] <- NA

CHdata <- CHdata %>%
  mutate(hhinc4 = 0) %>%  
  mutate(hhinc4 = ifelse(hhinc < 4, 1, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 4 & hhinc < 8, 2, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 8 & hhinc < 10, 3, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 10, 4, hhinc4))

CHdata$hhinc4 <- factor(CHdata$hhinc4,
                         levels = c(1,2,3,4),
                         labels = c("1.Low", "2.Middle", "3.High", "4.Very high"))

var_label(CHdata$hhinc4) <- "4 Income groups"


CHdata$equal <- as.factor(CHdata$equal)
var_label(CHdata$equal) <- "Equal treatment"
CHdata$equal[CHdata$equal == -99] <- NA
CHdata$equal <- factor(CHdata$equal,
                        levels = c(0,1),
                        labels = c("No", "Yes"))

#Country identifier
CHdata$country <- 4


##Argentina

#Import qualtrics file
Arg <- read.csv("Conjoint_Argentina.csv") 

#drop responses that are unfinished, did not provide consent or did not meet quota requirements
Arg <- subset(Arg, Finished != "0")
Arg <- subset(Arg, consent != "0")
Arg <- subset(Arg, t_t1_First.Click != "")

# Change -99 to missing in outcome variables
for (col_name in columns_to_process) {
  Arg[[col_name]][Arg[[col_name]] == "-99"] <- ""
}

write.csv(Arg, "Argentina.csv", row.names = FALSE)

ARGdata <- read.qualtrics("Argentina.csv", responses=c("ch1", "ch2", "ch3", "ch4", "ch5"), 
                         covariates=c("male", "age", "educ", "emp", "hhinc", "zip", "equal", "ineq", "partyid", "vote", "rile", "state", "trust", "Duration..in.seconds.", "just"),
                         respondentID = "ResponseId", letter="F", new.format=TRUE)

#fix variable names
ARGdata <- ARGdata %>%
  dplyr::rename(duration = male,
                male = age,    
                age = educ,
                educ = emp,
                emp = hhinc,
                hhinc = zip,
                zip = equal,
                equal = partyid,
                partyid = rile,
                rile = trust,
                trust = just,
                just = respondent,
                ineq = vote,
                vote = state, 
                state = Duration..in.seconds.,
                level_s = Ingreso.mensual,
                level_row = Ingreso.mensual.rowpos,
                tax_s = "Porcentaje.del.ingreso.pagado.en.impuestos.al.consumo.(IVA)",
                tax_row = "Porcentaje.del.ingreso.pagado.en.impuestos.al.consumo.(IVA).rowpos",
                source_s = Fuente.de.ingresos,
                source_row = Fuente.de.ingresos.rowpos)

ARGdata <- ARGdata %>%
  group_by(Response.ID) %>%
  mutate(id = cur_group_id()) %>%
  ungroup() 

#Drop speeders (those who finished in less than half of median time)
ARGdata$duration <- as.numeric(ARGdata$duration)
p50 <- median(ARGdata$duration)
ARGdata <- ARGdata %>%
  filter(!(duration < p50 / 2))

#Outcome variable
ARGdata <- ARGdata %>%
  filter(selected != "NA")

ARGdata <- ARGdata %>%
  rename(choice = selected)

#Create treatment variables
ARGdata <- ARGdata %>%
  mutate(source = 0) %>%  
  mutate(source = ifelse(source_s == "Consiguió trabajo a través de sus contactos familiares", 1, source)) %>% 
  mutate(source = ifelse(source_s == "Es propietario/a de una empresa que recibe subsidios estatales", 2, source)) %>%
  mutate(source = ifelse(source_s == "Recibe una anualidad de un premio de lotería", 3, source))

ARGdata$source <- factor(ARGdata$source,
                        levels = c(0,1,2,3),
                        labels = c("Effort", "Social background", "State benefit", "Luck"))

ARGdata <- ARGdata %>%
  mutate(tax = 0) %>%  
  mutate(tax = ifelse(tax_s == "10%", 1, tax)) %>% 
  mutate(tax = ifelse(tax_s == "5%", 2, tax))

ARGdata$tax <- factor(ARGdata$tax,
                     levels = c(0,1,2),
                     labels = c("15%", "10%", "5%"))

ARGdata <- ARGdata %>%
  mutate(tax2 = 0) %>%  
  mutate(tax2 = ifelse(tax_s == "10%", 1, tax2)) %>% 
  mutate(tax2 = ifelse(tax_s == "5%", 2, tax2))

ARGdata$tax2 <- factor(ARGdata$tax2,
                      levels = c(0,1,2),
                      labels = c("High", "Medium", "Low"))

ARGdata <- ARGdata %>%
  mutate(level = 0) %>%  
  mutate(level = ifelse(level_s == "$60,000", 1, level)) %>% 
  mutate(level = ifelse(level_s == "$100,000", 2, level))

ARGdata$level <- factor(ARGdata$level,
                       levels = c(0,1,2),
                       labels = c("$25,000", "$60,000", "$100,000"))

ARGdata <- ARGdata %>%
  mutate(income = 0) %>%  
  mutate(income = ifelse(level_s == "$60,000", 1, income)) %>% 
  mutate(income = ifelse(level_s == "$100,000", 2, income))

ARGdata$income <- factor(ARGdata$income,
                        levels = c(0,1,2),
                        labels = c("Low", "Middle", "High"))

#Create variables for conditional analyses
ARGdata <- ARGdata %>%
  mutate(r_party = 0) %>%  
  mutate(r_party = ifelse(partyid %in% c(1, 2), 1, r_party)) %>% 
  mutate(r_party = ifelse(partyid %in% c(0, 99, -99), NA, r_party))

ARGdata$r_party <- factor(ARGdata$r_party,
                         levels = c(0,1),
                         labels = c("Left-wing", "Right-wing"))
var_label(ARGdata$r_party) <- "Identifies with right-wing party (vs left-wing)"

ARGdata <- ARGdata %>%
  mutate(r_vote = 0) %>%  
  mutate(r_vote = ifelse(vote %in% c(1, 2, 3), 1, r_vote)) %>% 
  mutate(r_vote = ifelse(vote %in% c(0, 99, -99, 10), NA, r_vote))

ARGdata$r_vote <- factor(ARGdata$r_vote,
                        levels = c(0,1),
                        labels = c("Left-wing", "Right-wing"))

var_label(ARGdata$r_vote) <- "Voted for right-wing party (vs left-wing)"

ARGdata <- ARGdata %>%
  mutate(r_ideo = 0) %>%  
  mutate(r_ideo = ifelse(rile > 0, 1, r_ideo)) %>% 
  mutate(r_ideo = ifelse(rile %in% c(0, -99), NA, r_ideo))

ARGdata$r_ideo <- factor(ARGdata$r_ideo,
                        levels = c(0,1),
                        labels = c("Left-wing", "Right-wing"))

var_label(ARGdata$r_ideo) <- "Self-identifies as right-wing (vs left-wing)"

ARGdata <- ARGdata %>%
  mutate(ideo = 0) %>%  
  mutate(ideo = ifelse(rile %in% c(1, 2), 1, ideo)) %>% 
  mutate(ideo = ifelse(rile %in% c(-1, -2), -1, ideo)) %>% 
  mutate(ideo = ifelse(rile == -99, NA, ideo))

ARGdata$ideo <- factor(ARGdata$ideo,
                      levels = c(-1,1,0),
                      labels = c("Left-wing", "Right-wing", "Centrist"))

var_label(ARGdata$ideo) <- "3 point Ideological self-placement (left-centre-right)"

ARGdata$hhinc <- as.numeric(ARGdata$hhinc)
ARGdata$hhinc[ARGdata$hhinc == -99] <- NA

ARGdata <- ARGdata %>%
  mutate(hhinc4 = 0) %>%  
  mutate(hhinc4 = ifelse(hhinc < 4, 1, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 4 & hhinc < 8, 2, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 8 & hhinc < 10, 3, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 10, 4, hhinc4))

ARGdata$hhinc4 <- factor(ARGdata$hhinc4,
                        levels = c(1,2,3,4),
                        labels = c("1.Low", "2.Middle", "3.High", "4.Very high"))

var_label(ARGdata$hhinc4) <- "4 Income groups"


ARGdata$equal <- as.factor(ARGdata$equal)
var_label(ARGdata$equal) <- "Equal treatment"
ARGdata$equal[ARGdata$equal == -99] <- NA
ARGdata$equal <- factor(ARGdata$equal,
                       levels = c(0,1),
                       labels = c("No", "Yes"))

#Country identifier
ARGdata$country <- 3


##US

#import qualtrics file
USdata <- read.qualtrics("Conjoint_USA.csv", responses=c("Q26", "Q65", "Q67", "Q69", "Q71"), 
                         covariates=c("Q19", "Q43", "Q42", "Q52", "Q44", "Q15", "Q17", "Q38", "Q30", "Q51", "Q40", "Q50", "Q46", "Q34", "Q48", "Q18", "Duration..in.seconds.", "Finished"),
                         respondentID = "ResponseId", letter="F", new.format=TRUE)

#fix variable names
USdata <- USdata %>%
  dplyr::rename(zip = Q34,
                equal = Q52,
                ineq = Q44,
                partyid = Q48,
                finished = Q43,
                vote = Finished,
                rile = Q18,
                duration = Q19,
                male = Q15,
                age = Q17,
                educ = Q40,
                emp = Q50,
                hhinc = Q46,
                ethnicity = respondent,
                marital = Q30,
                bornus = Q51,
                voted = Duration..in.seconds.,
                just = Q42,
                level_s = Annual.income,
                level_row = Annual.income.rowpos,
                tax_s = Percentage.of.income.paid.in.sales.taxes,
                tax_row = Percentage.of.income.paid.in.sales.taxes.rowpos,
                source_s = Source.of.income,
                source_row = Source.of.income.rowpos)

#drop those who didn't finish
USdata <- subset(USdata, finished != "0")

#recode variables
USdata$equal[USdata$equal == 2] <- 0
USdata$male[USdata$male == 2] <- 0
USdata$age <- as.numeric(USdata$age)
USdata$age <- 2017 - USdata$age
USdata$bornus[USdata$bornus == 5] <- 1
USdata$bornus[USdata$bornus == 6] <- 0
USdata$vote[USdata$vote == ""] <- NA
USdata$partyid[USdata$partyid == ""] <- NA
USdata$rile[USdata$rile == ""] <- NA
USdata$hhinc[USdata$hhinc == ""] <- NA

#create variable identifying respondents
USdata <- USdata %>%
  group_by(Response.ID) %>%
  mutate(id = cur_group_id()) %>%
  ungroup() 

#Drop speeders (those who finished in less than half of median time)
USdata$duration <- as.numeric(USdata$duration)
p50 <- median(USdata$duration)
USdata <- USdata %>%
  filter(!(duration < p50 / 2))

#Outcome variable
USdata <- USdata %>%
  rename(choice = selected)

#Create treatment variables
USdata <- USdata %>%
  mutate(source = 0) %>%  
  mutate(source = ifelse(source_s == "Got a job through family connections", 1, source)) %>% 
  mutate(source = ifelse(source_s == "Owns business that was bailed out by government", 2, source)) %>%
  mutate(source = ifelse(source_s == "Receives annuity from lottery prize", 3, source))

USdata$source <- factor(USdata$source,
                         levels = c(0,1,2,3),
                         labels = c("Effort", "Social background", "State benefit", "Luck"))

USdata <- USdata %>%
  mutate(tax = 0) %>%  
  mutate(tax = ifelse(tax_s == "5%", 1, tax)) %>% 
  mutate(tax = ifelse(tax_s == "1%", 2, tax))

USdata$tax <- factor(USdata$tax,
                      levels = c(0,1,2),
                      labels = c("10%", "5%", "1%"))

USdata <- USdata %>%
  mutate(tax2 = 0) %>%  
  mutate(tax2 = ifelse(tax_s == "5%", 1, tax2)) %>% 
  mutate(tax2 = ifelse(tax_s == "1%", 2, tax2))

USdata$tax2 <- factor(USdata$tax2,
                      levels = c(0,1,2),
                      labels = c("High", "Medium", "Low"))

USdata <- USdata %>%
  mutate(level = 0) %>%  
  mutate(level = ifelse(level_s == "$80,000", 1, level)) %>% 
  mutate(level = ifelse(level_s == "$150,000", 2, level))

USdata$level <- factor(USdata$level,
                        levels = c(0,1,2),
                        labels = c("$30,000", "$80,000", "$150,000"))

USdata <- USdata %>%
  mutate(income = 0) %>%  
  mutate(income = ifelse(level_s == "$80,000", 1, income)) %>% 
  mutate(income = ifelse(level_s == "$150,000", 2, income))

USdata$income <- factor(USdata$income,
                         levels = c(0,1,2),
                         labels = c("Low", "Middle", "High"))

#Drop profiles where both individuals had same attributes
USdata <- USdata %>%
  mutate(same = 0)

USdata <- USdata %>%
  arrange(id, task) %>%
  group_by(id, task) %>%
  mutate(same = ifelse(
    source == lag(source) & tax == lag(tax) & level == lag(level), 1, same
  ))
USdata$same[USdata$same %in% NA] <- 0

USdata <- USdata %>%
  group_by(id, task) %>%
  mutate(test = max(same)) %>%
  ungroup()
USdata <- USdata[USdata$test != 1, ]

#Export justifications
justUS <- USdata %>%
  select(
    Response.ID, just, task, profile, tax, level, source, choice
  )

#Create variables for conditional analyses
USdata <- USdata %>%
  mutate(r_party = 0) %>%  
  mutate(r_party = ifelse(partyid == 1, 1, r_party)) %>% 
  mutate(r_party = ifelse(partyid %in% c(3, 4), NA, r_party))

USdata$r_party <- factor(USdata$r_party,
                          levels = c(0,1),
                          labels = c("Left-wing", "Right-wing"))
var_label(USdata$r_party) <- "Identifies with right-wing party (vs left-wing)"

USdata <- USdata %>%
  mutate(r_vote = 0) %>%  
  mutate(r_vote = ifelse(vote == 1, 1, r_vote)) %>% 
  mutate(r_vote = ifelse(vote %in% c(3, 4), NA, r_vote))

USdata$r_vote <- factor(USdata$r_vote,
                         levels = c(0,1),
                         labels = c("Left-wing", "Right-wing"))

var_label(USdata$r_vote) <- "Voted for right-wing party (vs left-wing)"

USdata <- USdata %>%
  mutate(r_ideo = 0) %>%  
  mutate(r_ideo = ifelse(rile %in% c(1, 2), 1, r_ideo)) %>% 
  mutate(r_ideo = ifelse(rile %in% c(3, NA), NA, r_ideo))

USdata$r_ideo <- factor(USdata$r_ideo,
                         levels = c(0,1),
                         labels = c("Left-wing", "Right-wing"))

var_label(USdata$r_ideo) <- "Self-identifies as right-wing (vs left-wing)"

USdata <- USdata %>%
  mutate(ideo = 0) %>%  
  mutate(ideo = ifelse(rile %in% c(1, 2), 1, ideo)) %>% 
  mutate(ideo = ifelse(rile %in% c(4, 5), -1, ideo)) %>% 
  mutate(ideo = ifelse(rile %in% NA, NA, ideo))

USdata$ideo <- factor(USdata$ideo,
                       levels = c(-1,1,0),
                       labels = c("Left-wing", "Right-wing", "Centrist"))

var_label(USdata$ideo) <- "3 point Ideological self-placement (left-centre-right)"

USdata$hhinc <- as.numeric(USdata$hhinc)

USdata <- USdata %>%
  mutate(hhinc4 = 0) %>%  
  mutate(hhinc4 = ifelse(hhinc < 16, 1, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 16 & hhinc < 21, 2, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 21 & hhinc < 23, 3, hhinc4)) %>% 
  mutate(hhinc4 = ifelse(hhinc >= 23, 4, hhinc4))

USdata$hhinc4 <- factor(USdata$hhinc4,
                         levels = c(1,2,3,4),
                         labels = c("1.Low", "2.Middle", "3.High", "4.Very high"))

var_label(USdata$hhinc4) <- "4 Income groups"

USdata$equal <- as.factor(USdata$equal)
var_label(USdata$equal) <- "Equal treatment"
USdata$equal[USdata$equal == ""] <- NA
USdata$equal <- factor(USdata$equal,
                        levels = c(0,1),
                        labels = c("No", "Yes"))

#Country identifier
USdata$country <- 1

#Weights
#export 
weightsUS <- USdata %>%
  select(
    Response.ID, male, age, ethnicity, educ, hhinc, zip, partyid
  )
weightsUS <- weightsUS %>%
  rename(responseid = Response.ID)

write_dta(weightsUS, "weights_us.dta")

#construct weights using stata do-file and import them
weights <- read.dta("weights_us_ready.dta") 
names(USdata)[names(USdata) == "Response.ID"] <- "responseid"
USdata <- merge(USdata, weights, by.x = "responseid", by.y = "responseid", all.x = TRUE)


##Combine data frames
#add weighting variable for non-US responses
ARGdata$webal <- 1
CHdata$webal <- 1
AUSdata$webal <- 1

#keep same variables in all data frames
data_frames <- list(ARGdata, CHdata, USdata, AUSdata)
variables_to_keep <- c(
  "equal", "ineq", "task", "profile", "choice", "source", "tax", "tax2", "level", "income", 
  "r_party", "r_vote", "r_ideo", "ideo", "hhinc4", "country", "id", "webal"
)

for (i in seq_along(data_frames)) {
  data_frames[[i]] <- data_frames[[i]] %>% select(all_of(variables_to_keep))
}

ARGdata <- data_frames[[1]]
CHdata <- data_frames[[2]]
USdata <- data_frames[[3]]
AUSdata <- data_frames[[4]]

cj4 <- bind_rows(ARGdata, AUSdata, USdata, CHdata)

cj4$country <- factor(cj4$country,
                       levels = c(1, 2, 3, 4),
                       labels = c("US", "Australia", "Chile", "Argentina"))

##Prepare justification data for wordclouds
justUS <- justUS %>%
  filter(task == 1)

justUS <- justUS %>%
  select(-task)

justUS <- justUS %>%
  pivot_wider(id_cols = Response.ID, names_from = profile, values_from = c(level, source, tax, choice, just))

justUS$choice_1 <- ifelse(justUS$choice_1 == 0, 2, justUS$choice_1)

justUS <- justUS %>%
  select(-just_2) %>%
  select(-choice_2)

justUS <- justUS %>%
  rename(choice = choice_1) %>%
  rename(just = just_1)

justUS$level_1 <- as.numeric(justUS$level_1)
justUS$level_2 <- as.numeric(justUS$level_2)

justUS$ch_higherlevel <- NA
justUS$ch_higherlevel <- ifelse(justUS$level_1 > justUS$level_2 & justUS$choice == 1, 1, justUS$ch_higherlevel)
justUS$ch_higherlevel <- ifelse(justUS$level_1 < justUS$level_2 & justUS$choice == 2, 1, justUS$ch_higherlevel)
justUS$ch_higherlevel <- ifelse(justUS$level_1 > justUS$level_2 & justUS$choice == 2, 0, justUS$ch_higherlevel)
justUS$ch_higherlevel <- ifelse(justUS$level_1 < justUS$level_2 & justUS$choice == 1, 0, justUS$ch_higherlevel)

justUS$ch_state <- NA
justUS$ch_state <- ifelse(justUS$source_1 == "State benefit" & justUS$choice == 1, 1, justUS$ch_state)
justUS$ch_state <- ifelse(justUS$source_2 == "State benefit" & justUS$choice == 2, 1, justUS$ch_state)
justUS$ch_state <- ifelse(justUS$source_1 == "State benefit" & justUS$choice == 2, 0, justUS$ch_state)
justUS$ch_state <- ifelse(justUS$source_2 == "State benefit" & justUS$choice == 1, 0, justUS$ch_state)
justUS$ch_state <- ifelse(justUS$source_1 == "State benefit" & justUS$source_2 == "State benefit", NA, justUS$ch_state)

justUS$tax_1 <- as.numeric(justUS$tax_1)
justUS$tax_2 <- as.numeric(justUS$tax_2)

justUS$ch_lowertax <- NA
justUS$ch_lowertax <- ifelse(justUS$tax_1 > justUS$tax_2 & justUS$choice == 1, 1, justUS$ch_lowertax)
justUS$ch_lowertax <- ifelse(justUS$tax_1 < justUS$tax_2 & justUS$choice == 2, 1, justUS$ch_lowertax)
justUS$ch_lowertax <- ifelse(justUS$tax_1 > justUS$tax_2 & justUS$choice == 2, 0, justUS$ch_lowertax)
justUS$ch_lowertax <- ifelse(justUS$tax_1 < justUS$tax_2 & justUS$choice == 1, 0, justUS$ch_lowertax)

#To make figures see replicate_figures.R
